In [ ]:
from planet4 import io, markings, plotting, catalog_production
import matplotlib as mpl
In [ ]:
import seaborn as sns
sns.set_context('paper')
sns.set_palette('bright', color_codes=True)
In [ ]:
def get_gold_ids(person):
"""Get gold data
Pararemeters
------------
person : {"GP", "MES", "KMA", "common_gold_data"}
Returns
-------
pd.Series
"""
path = Path("/Users/klay6683/Dropbox/Documents/latex_docs/p4_paper1/gold_data")
return pd.read_csv(path / f"{person}.txt", header=None, squeeze=True)
class DataPipe:
gold_names = dict(GP=("Portyankina", 'anya'), MES=('mschwamb','meg'), KMA=('michaelaye', 'michael'))
def __init__(self, version='v1.0'):
self.rm = catalog_production.ReleaseManager(version)
self.db = io.DBManager()
@property
def anyas_ids(self):
return get_gold_ids('GP')
@property
def megs_ids(self):
return get_gold_ids('MES')
@property
def michaels_ids(self):
return get_gold_ids('KMA')
@property
def common_ids(self):
return get_gold_ids('common_gold_data')
def get_catalog_fans_for_id(self, id_):
pm = io.PathManager(id_=id_, datapath=self.rm.savefolder)
return pm.final_fandf if pm.final_fanfile.exists() else None
def get_catalog_blotches_for_id(self, id_):
pm = io.PathManager(id_=id_, datapath=self.rm.savefolder)
return pm.final_blotchdf if pm.final_blotchfile.exists() else None
def get_catalog_data_for_id(self, id_):
fans = self.get_catalog_fans_for_id(id_)
if fans is not None:
fans['marking'] = 'fan'
blotches = self.get_catalog_blotches_for_id(id_)
if blotches is not None:
blotches['marking'] = 'blotch'
if any([fans is not None,
blotches is not None]):
return pd.concat([fans, blotches], ignore_index=True, sort=True)
else:
return None
def get_catalog_data_for_ids(self, ids):
data = list(map(self.get_catalog_data_for_id, ids))
return pd.concat(data, ignore_index=True, sort=True) if data is not None else None
def get_catalog_data_for_gold_member(self, name):
ids = get_gold_ids(name)
return pd.concat(map(self.get_catalog_data_for_id, ids), ignore_index=True)
def get_user_markings_for_id(self, user, id_,):
data = self.db.get_image_id_markings(id_)
return data.query('user_name == @user')
def get_user_markings_for_ids(self, user, ids):
data = [self.get_user_markings_for_id(user, id_,) for id_ in ids]
return pd.concat(data, ignore_index=True, sort=True)
def get_gold_markings_for_expert(self, expert_id):
names = self.gold_names[expert_id]
ids = get_gold_ids(expert_id)
return self.get_user_markings_for_ids(names[0], ids)
def get_gold_markings_common(self):
data = [self.db.get_image_id_markings(id_).
query('user_name in @markings.GOLD_MEMBERS') for id_ in self.common_ids]
return pd.concat(data, ignore_index=True, sort=True)
In [ ]:
rm = catalog_production.ReleaseManager('v1.0')
In [ ]:
rm.savefolder
In [ ]:
pm = io.PathManager(id_='7xp', datapath=rm.savefolder)
In [ ]:
pm.final_fanfile
In [ ]:
pipe = DataPipe()
In [ ]:
gold_data = {}
for expert in pipe.gold_names.keys():
print(expert)
gold_data[expert] = pipe.get_gold_markings_for_expert(expert)
gold_data['common'] = pipe.get_gold_markings_common()
In [ ]:
catalog_data = {}
for expert in pipe.gold_names.keys():
print(expert)
ids = get_gold_ids(expert)
catalog_data[expert] = pipe.get_catalog_data_for_ids(ids)
In [ ]:
catalog_data['common'] = pipe.get_catalog_data_for_ids(get_gold_ids('common_gold_data'))
In [ ]:
for expert in pipe.gold_names.keys():
print(expert)
print(gold_data[expert].image_id.nunique())
print(len(get_gold_ids(expert)))
In [ ]:
for expert in pipe.gold_names.keys():
print(gold_data[expert].nunique())
In [ ]:
%matplotlib ipympl
plt.style.use('tableau-colorblind10')
In [ ]:
plt.rcParams['axes.grid'] = False
plt.rcParams['axes.grid.which'] = 'both'
In [ ]:
bins = np.arange(0, 175, 5)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
ax.grid(which='major', lw=0.5)
data = {expert:gold_data, 'catalog':catalog_data}
expdata = gold_data[expert].groupby('image_id').size()
catdata = catalog_data[expert].groupby('image_id').size()
_ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
ax.legend()
ax.set_ylabel("# of tiles")
ax.set_xlabel('# of fans+blotches per Planet Four tile')
fig.suptitle('Expert vs Catalog object identification frequency')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_and_blotches_histos.pdf',
dpi=200, bbox_inches='tight')
In [ ]:
sns.set_context('notebook')
In [ ]:
bins = np.arange(0, 75, 5)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].groupby(['user_name','image_id']).size()
catdata = catalog_data[expert].groupby('image_id').size()
_ = axes.hist([expdata['Portyankina'], expdata['mschwamb'],
expdata['michaelaye'], catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of tiles")
axes.set_xlabel('# of fans+blotches per Planet Four tile')
axes.set_title("Common Expert data vs Catalog");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_and_blotches_histos_common.pdf',
dpi=150, bbox_inches='tight')
In [ ]:
plt.close('all')
In [ ]:
bins = np.arange(0, 85, 5)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
ax.grid(which='major', lw=0.5)
data = {expert:gold_data, 'catalog':catalog_data}
expdata = gold_data[expert].query('marking=="fan"').groupby('image_id').size()
catdata = catalog_data[expert].query('marking=="fan"').groupby('image_id').size()
_ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
ax.legend()
ax.set_ylabel("# of tiles")
ax.set_xlabel('# of fans per Planet Four tile')
fig.suptitle('Expert vs Catalog object identification frequency: Fans only')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_histos.pdf',
dpi=200, bbox_inches='tight')
In [ ]:
bins = np.arange(0, 60, 5)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].query('marking=="fan"').groupby(['user_name','image_id']).size()
catdata = catalog_data[expert].query('marking=="fan"').groupby('image_id').size()
_ = axes.hist([expdata['Portyankina'], expdata['mschwamb'],
expdata['michaelaye'], catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of tiles")
axes.set_xlabel('# of fans per Planet Four tile')
axes.set_title("Common Expert data vs Catalog: Fans only");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_histos_common.pdf',
dpi=150, bbox_inches='tight')
In [ ]:
bins = np.arange(0, 85, 5)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
ax.grid(which='major', lw=0.5)
data = {expert:gold_data, 'catalog':catalog_data}
expdata = gold_data[expert].query('marking=="blotch"').groupby('image_id').size()
catdata = catalog_data[expert].query('marking=="blotch"').groupby('image_id').size()
_ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
ax.legend()
ax.set_ylabel("# of tiles")
ax.set_xlabel('# of blotches per Planet Four tile')
fig.suptitle('Expert vs Catalog object identification frequency: Blotches only')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotches_histos.pdf',
dpi=200, bbox_inches='tight')
In [ ]:
bins = np.arange(0, 60, 5)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].query('marking=="blotch"').groupby(['user_name','image_id']).size()
catdata = catalog_data[expert].query('marking=="blotch"').groupby('image_id').size()
_ = axes.hist([expdata['Portyankina'], expdata['mschwamb'],
expdata['michaelaye'], catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of tiles")
axes.set_xlabel('# of blotches per Planet Four tile')
axes.set_title("Common Expert data vs Catalog: Blotches only");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotches_histos_common.pdf',
dpi=150, bbox_inches='tight')
In [ ]:
plt.close('all')
In [ ]:
bins = np.arange(0, 600, 30)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
ax.grid(which='major', lw=0.5)
data = {expert:gold_data, 'catalog':catalog_data}
expdata = gold_data[expert].query('marking=="fan"').distance
catdata = catalog_data[expert].query('marking=="fan"').distance
_ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
ax.legend()
ax.set_ylabel("# of fans")
ax.set_xlabel('Fan lengths [pixel]')
fig.suptitle('Fans lengths, expert vs catalog')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fan_lengths.pdf',
dpi=200, bbox_inches='tight')
In [ ]:
bins = np.arange(0, 600, 30)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].query('marking=="fan"').groupby(['user_name']).distance
catdata = catalog_data[expert].query('marking=="fan"').distance
_ = axes.hist([expdata.get_group('Portyankina'), expdata.get_group('mschwamb'),
expdata.get_group('michaelaye'), catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of fans")
axes.set_xlabel('Fan lengths [pixel]')
axes.set_title("Fan lengths, common expert data vs catalog");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fan_lengths_common.pdf',
dpi=150, bbox_inches='tight')
In [ ]:
plt.close('all')
In [ ]:
bins = np.arange(300, 120000, 5000)
from math import pi
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
ax.grid(which='major', lw=0.5)
data = {expert:gold_data, 'catalog':catalog_data}
expdata = gold_data[expert].query('marking=="blotch"')[['radius_1', 'radius_2']]
expdata = expdata.radius_1*expdata.radius_2*pi
catdata = catalog_data[expert].query('marking=="blotch"')[['radius_1', 'radius_2']]
catdata = catdata.radius_1*catdata.radius_2*pi
_ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
ax.legend()
ax.set_ylabel("# of blotches")
ax.set_xlabel('Blotch area [pixel**2]')
fig.suptitle('Blotch area, expert vs catalog')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotch_areas.pdf',
dpi=200, bbox_inches='tight')
In [ ]:
bins = np.arange(300, 80000, 5000)
from math import pi
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
gold_data[expert]['area'] = gold_data[expert].radius_1*gold_data[expert].radius_2*pi
catalog_data[expert]['area'] = catalog_data[expert].radius_1*catalog_data[expert].radius_2*pi
expdata = gold_data[expert].query('marking=="blotch"').groupby(['user_name']).area
catdata = catalog_data[expert].query('marking=="blotch"').area
_ = axes.hist([expdata.get_group('Portyankina'), expdata.get_group('mschwamb'),
expdata.get_group('michaelaye'), catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of blotches")
axes.set_xlabel('Blotch area [pixel**2]')
axes.set_title("Blotch area, common expert data vs catalog");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotch_areas_common.pdf',
dpi=150, bbox_inches='tight')
In [ ]:
In [ ]:
gold_ids = get_gold_ids('common_gold_data')
In [ ]:
def plot_gold(i):
id_ = gold_ids[i]
p4img = markings.ImageID(id_)
# create plot window
fig, ax = plt.subplots(ncols=2, nrows=2)
axes = ax.flatten()
# fill images, 0 and 2 get it automatically
# for i in [1,3]:
# p4img.show_subframe(ax=axes[i])
# remove pixel coord axes
for ax in axes:
ax.axis('off')
# citizen stuff
p4img.show_subframe(ax=axes[0])
axes[0].set_title('Planet Four input tile')
p4img.plot_fans(without_users=markings.GOLD_MEMBERS, ax=axes[1])
p4img.plot_blotches(without_users=markings.GOLD_MEMBERS, ax=axes[1])
# plotting.plot_raw_fans(id_, ax=axes[1])
axes[1].set_title('Citizen Markings')
plotting.plot_finals(id_, datapath=rm.savefolder, ax=axes[2],
wind_pointer=True)
axes[2].set_title('Catalog clustered data.')
# gold stuff
gold_star_plotter(p4img, axes[3], kind='fan')
gold_star_plotter(p4img, axes[3], kind='blotch')
axes[3].set_title('Science team markings')
fig.subplots_adjust(wspace=0.05, hspace=0.15)
fig.suptitle(id_)
path = Path("./plots/gold_plots")
path.mkdir(exist_ok=True)
fig.savefig(path / f"gold_data{str(i).zfill(2)}.png",
dpi=150, bbox_inches='tight')
In [ ]:
plot_gold(0)
In [ ]:
for i in range(len(gold_ids)):
print(i)
try:
plot_gold(i)
except:
continue
plt.close('all')
In [ ]:
db = io.DBManager()
In [ ]:
bucket = []
ids = []
for id_ in gold_ids:
golddata = db.get_image_id_markings(id_).query('user_name in @markings.GOLD_MEMBERS')
gold_angle_mean = golddata.query('marking=="fan"').angle.mean()
pm = io.PathManager(id_=id_, datapath=rm.savefolder)
try:
catalog_angle_mean = pm.final_fandf.angle.mean()
except FileNotFoundError:
continue
else:
if len(pm.final_fandf) > 3:
ids.append(id_)
bucket.append(gold_angle_mean-catalog_angle_mean)
In [ ]:
bucket = np.array(bucket)
ids = np.array(ids)
In [ ]:
ids[bucket > 25]
In [ ]:
plotting.plot_finals('c0t', datapath=rm.savefolder)
In [ ]:
gold_ids = np.array(gold_ids)
In [ ]:
np.where(gold_ids=='APF0000c0t')
In [ ]:
gold_ids == 'APF00002aj'
In [ ]:
len(bucket)
In [ ]:
bucket=bucket[bucket < 80]
In [ ]:
bucket.max()
In [ ]:
plt.close('all')
In [ ]:
len(gold_ids)
In [ ]:
sns.set_context('paper')
In [ ]:
bucket2 = pd.read_csv("angle_std_bucket.csv", squeeze=True, header=None)
In [ ]:
bins = np.arange(0, 22, 1)
In [ ]:
fig, axes = plt.subplots(constrained_layout=True, ncols=2, figsize=(8, 3))
sns.distplot(bucket[~np.isnan(bucket)], bins=np.arange(-40, 41, 2), kde=False, ax=axes[0])
axes[0].set_title('Histogram of deltas between science team\nand volunteer mean fan directions.')
axes[0].set_xlabel("Delta mean wind direction per Planet Four tile");
axes[0].set_ylabel("Bin Counts");
sns.distplot(bucket2, kde=False, bins=bins, ax=axes[1])
ax=axes[1]
ax.set_title("Histogram of angular STD for merged fan clusters")
ax.set_xlabel("Fan angle standard deviation per cluster [deg]")
ax.set_ylabel("Bin Counts")
for ax in axes:
ax.grid()
plt.savefig("/Users/klay6683/Documents/latex_docs/p4_paper1/figures/gold_mean_fan_deltas_histo.pdf")
In [ ]:
from scipy.stats import circstd
In [ ]:
circstd?
In [ ]:
from scipy.stats import circstd
In [ ]:
In [ ]:
### 2 windows side by side. before and after clustering
# create plot window
fig, ax = plt.subplots(ncols=2, figsize=(12,5))
fig.tight_layout()
axes = ax.flatten()
# fill images, 0 and 2 get it automatically
for i in [1]:
p4img.show_subframe(ax=axes[i])
# remove pixel coord axes
for ax in axes:
ax.axis('off')
# citizen stuff
p4img.plot_fans(ax=axes[0])
axes[0].set_title('Citizen Markings')
db = clustering(p4img.get_fans(), axes[1],
eps=7,
min_samples=5, fans=True, ls='-')
axes[1].set_title('All citizens clusters (science team dashed lines)')
db = clustering(golddata, axes[1],
min_samples=2,
eps=11, fans=True, ls='--')
savefig('MDAP_clustering1.png',dpi=100)
In [ ]:
p4img.plot_fans()
title(p4img.imgid)
plt.axis('off')
savefig('/Users/maye/Dropbox/src/planet4_paper1/images/fan_markings.eps', dpi=150)
In [ ]:
# create plot window
fig, ax = plt.subplots(ncols=1, nrows=2, figsize=(12,10))
axes = ax.flatten()
# fill images, 0 and 2 get it automatically
for i in [0,1]:
p4img.show_subframe(ax=axes[i])
# citizen stuff
db = clustering(p4img.get_fans(), axes[0],
eps=7,
min_samples=5, fans=True)
axes[0].set_title('All citizens clusters (including science team)')
# gold stuff
db = clustering(golddata, axes[1],
min_samples=1,
eps=7, fans=True)
axes[1].set_title('Gold data clusters')
In [ ]:
### single window, after clustering
# create plot window
fig, ax = plt.subplots(figsize=(12,9))
axes = ax
# fill images, 0 and 2 get it automatically
for i in [0]:
p4img.show_subframe(ax=axes)
# citizen stuff
db = clustering(p4img.get_fans(), axes,
eps=7,
min_samples=5, fans=True)
# axes.set_title('All citizens clusters (including science team)')
plt.axis('off')
plt.savefig('/Users/maye/Dropbox/src/planet4_paper1/images/fans_clustered.eps',bbox_inches='tight',dpi=150)
In [ ]:
fig, axes = subplots()
# gold stuff
gold_star_plotter(p4img, axes, fans=True, blotches=False)
axes.set_title('Science team markings')
In [ ]:
labels = db.labels_.astype('int')
unique_labels = set(labels)
unique_labels
In [ ]:
label_members = [index[0] for index in argwhere(labels==2)]
label_members
In [ ]:
ellipse_cols
In [ ]:
data = golddata
ellipsedata = data[ellipse_cols].iloc[label_members]
ellipsedata
In [ ]:
fig, ax = plt.subplots()
for i in range(len(ellipsedata)):
blotch = markings.Blotch(ellipsedata.iloc[i])
ax.add_artist(blotch)
markings.set_subframe_size(ax)
# meandata = ellipsedata.mean(axis=0)
# meandata